** Clearing Stata memory
capture log close
clear all
set more off, perm
set seed 1234

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////// Table O.25: Priority Subjects and Gender Performance Gap: School Location ////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

** Opening Phase 2 norm_scores dataset 
use "Work Data/Gender_Phase2_long.dta",clear

*** Creating variables
encode subject, gen (sub)
tab subject, gen (d_sub)
label var sub "Subject"

** Subject dummies
rename d_sub1 Biology
rename d_sub2 Chemistry
rename d_sub3 Geography
rename d_sub4 History
rename d_sub5 Language
rename d_sub6 Mathematics
rename d_sub7 Physics
rename d_sub8 Portuguese
* Labels
label var Biology "Biology"
label var Chemistry "Chemistry"
label var Geography "Geography"
label var History "History"
label var Math "Mathematics"
label var Physics "Physics"
label var Portuguese "Portuguese"
label var Language "Foreign Language"

** Interaction: priority X female
gen fem_priority=female*priority
label var fem_priority "Female $\times$ Priority"

** Interaction: priority X subject
foreach v of varlist Biology-Portuguese {
gen fem_`v'=`v'*female
label var fem_`v' "Female $\times$ `v'"
gen prio_`v'=priority*`v'
label var prio_`v' "Priority $\times$ `v'"
gen fem_prio_`v'=fem_priority*`v'
label var fem_prio_`v' "Female $\times$ Priority $\times$ `v'"
}

global subject "Chemistry Geography History Mathematics Physics"
global subject_fem "fem_Chemistry fem_Geography fem_History fem_Mathematics fem_Physics"

** P1 scores: P1 normalized subject-specific scores
forvalues i=2(1)4 {
gen norm_p1score`i'=norm_p1score^`i'
sum norm_p1score`i'
}

*********************************************************************************
****************   Relative performances ****************************************
*********************************************************************************

** ENEM

foreach v in norm_enem_w {
bys year female: egen `v'_ave_g=mean(`v')
gen `v'_g=`v'-`v'_ave_g
bys year female: sum `v'_g
}

drop norm_enem_w_ave_g

* Interaction: subject X ENEM
foreach v of varlist Biology-Portuguese {
gen enem_`v'=`v'*norm_enem_w_g
label var enem_`v' "ENEM $\times$ `v'"
gen fem_enem_`v'=female*norm_enem_w_g*`v'
label var fem_enem_`v' "Female $\times$ ENEM $\times$ `v'"
forvalues i=2(1)4 {
gen enem_`v'_`i'=enem_`v'^`i'
gen fem_enem_`v'_`i'=fem_enem_`v'^`i'
}
sum enem_`v'* fem_enem_`v'*
}

global g_pol_enem_sub "enem_Chemistry* enem_Geography* enem_History* enem_Mathematics* enem_Physics*"
d $g_pol_enem_sub

* Priority x relative performance in ENEM:
foreach v in norm_enem_w {
gen `v'_priority_g=`v'_g*priority
forvalues i=2(1)4 {
gen `v'_priority_g`i'=`v'_g^`i'*priority
sum `v'_priority_g`i'
}
}

global g_norm_enem_w_prio norm_enem_w_priority_g*
d $g_norm_enem_w_prio

** Phase 1 scores

foreach v in norm_p1score {

tab year, sum(`v')
bys year subject female: egen gs_`v'_ave=mean(`v')
gen gs_`v'=`v'-gs_`v'_ave
bys year female subject: sum gs_`v'
drop gs_`v'_ave

forvalues i=2(1)4 {
gen gs_`v'`i'=gs_`v'^`i'
sum gs_`v'`i'
}

global gs_pol_`v' gs_`v' gs_`v'2 gs_`v'3 gs_`v'4
d $gs_pol_`v'

* Priority x Phase 1 scores:
gen gs_`v'_prio=gs_`v'*priority
forvalues i=2(1)4 {
gen gs_`v'_prio`i'=gs_`v'`i'*priority
sum gs_`v'_prio*
}
}

global gs_pol_norm_p1score_prio gs_norm_p1score_prio*
d $gs_pol_norm_p1score_prio

*********************************************************************************
**************** Main sample ****************************************************
*********************************************************************************

* 1) Only years before the affirmative action took place
drop if aa_year==1
drop if year==2000
tab year

* 2) Drop Portuguese and Foreign Language (in Phase 1 there is no Portuguese or Foreign Language exams - For Portuguese Phase 1 has an essay)
 tab subject, sum(norm_p1score)
 drop if subject=="lang" | subject=="port" 
 tab subject, sum(norm_p1score)
 drop Language Portuguese prio_Language prio_Portuguese fem_prio_Language fem_prio_Portuguese 
 
 *** Identifying Campinas Metropolitan Region
 
tab year, sum (co_municipio)
mdesc co_municipio
d co_municipio
inspect co_mun
tostring co_mun, gen(codmun_str)
gen number_digits=length(codmun_str)
replace number_digits=. if co_municipio==.
tab number_digits // 7-digits IBGE code
drop codmun_str number_digits

* Source: IBGE + https://www.al.sp.gov.br/repositorio/legislacao/lei.complementar/2000/lei.complementar-870-19.06.2000.html
gen Campinas_metropolitan_region=0 if co_municipio~=.
replace Campinas_metropolitan_region=1 if co_municipio==3501608 // city Americana; code: 3501608
replace Campinas_metropolitan_region=1 if co_municipio==3503802 // city Artur Nogueira; code: 3503802
replace Campinas_metropolitan_region=1 if co_municipio==3509502 // city Campinas; code: 3509502
replace Campinas_metropolitan_region=1 if co_municipio==3512803 // city Cosmopolis; code: 3512803
replace Campinas_metropolitan_region=1 if co_municipio==3515152 // city Engenheiro Coelho; code: 3515152
replace Campinas_metropolitan_region=1 if co_municipio==3519055 // city Holambra; code: 3519055
replace Campinas_metropolitan_region=1 if co_municipio==3519071 // city Hortolândia; code: 3519071
replace Campinas_metropolitan_region=1 if co_municipio==3520509 // city Indaiatuba; code: 3520509
replace Campinas_metropolitan_region=1 if co_municipio==3523404 // city Itatiba; code: 3523404
replace Campinas_metropolitan_region=1 if co_municipio==3524709 // city Jaguariuna; code: 3524709
replace Campinas_metropolitan_region=1 if co_municipio==3531803 // city Monte Mor; code: 3531803
replace Campinas_metropolitan_region=1 if co_municipio==3532009 // city Morungaba; code: 3532009
replace Campinas_metropolitan_region=1 if co_municipio==3533403 // city Nova Odessa; code: 3533403
replace Campinas_metropolitan_region=1 if co_municipio==3536505 // city Paulinia; code: 3536505
replace Campinas_metropolitan_region=1 if co_municipio==3537107 // city Pedreira; code: 3537107
replace Campinas_metropolitan_region=1 if co_municipio==3545803 // city Santa Barbara D'Oeste; code: 3545803
replace Campinas_metropolitan_region=1 if co_municipio==3548005 // city Santo Antonio de Posse; code: 3548005
replace Campinas_metropolitan_region=1 if co_municipio==3552403 // city Sumare; code: 3552403
replace Campinas_metropolitan_region=1 if co_municipio==3556206 // city Valinhos; code: 3556206
replace Campinas_metropolitan_region=1 if co_municipio==3556701 // city Vinhedo; code: 3556701
sum Campinas_metropolitan_region

gen cidade_SP = (co_municipio == 3550308)
replace cidade_SP = . if co_municipio==.
sum cidade_SP

*********************************************************************************
****************   Regressions **************************************************
*********************************************************************************

estimates clear

reg norm_score female priority fem_priority norm_enem_w_g if Campinas_metropolitan_region==1, cluster(inscri2) 
estimates store Campinas1
reg norm_score female priority fem_priority norm_enem_w_g $subject $subject_fem if Campinas_metropolitan_region==1, cluster(inscri2) 
estimates store Campinas2
reghdfe norm_score priority fem_priority $subject $subject_fem if Campinas_metropolitan_region==1, cluster(inscri2) absorb(inscri2)  
estimates store Campinas3
reghdfe norm_score priority fem_priority $subject $subject_fem $g_pol_enem_sub if Campinas_metropolitan_region==1, cluster(inscri2) absorb(inscri2)  
estimates store Campinas4
reghdfe norm_score priority fem_priority $subject $subject_fem $g_pol_enem_sub $g_norm_enem_w_prio if Campinas_metropolitan_region==1, cluster(inscri2) absorb(inscri2)  
estimates store Campinas5
reghdfe norm_score priority fem_priority $subject $subject_fem $g_pol_enem_sub $g_norm_enem_w_prio $gs_pol_norm_p1score if Campinas_metropolitan_region==1, cluster(inscri2) absorb(inscri2)  
estimates store Campinas6
reghdfe norm_score priority fem_priority $subject $subject_fem $g_pol_enem_sub $g_norm_enem_w_prio $gs_pol_norm_p1score $gs_pol_norm_p1score_prio if Campinas_metropolitan_region==1, cluster(inscri2) absorb(inscri2)  
estimates store Campinas7

reg norm_score female priority fem_priority norm_enem_w_g if Campinas_metropolitan_region==0, cluster(inscri2) 
estimates store othercit1
reg norm_score female priority fem_priority norm_enem_w_g $subject $subject_fem if Campinas_metropolitan_region==0, cluster(inscri2) 
estimates store othercit2
reghdfe norm_score priority fem_priority $subject $subject_fem if Campinas_metropolitan_region==0, cluster(inscri2) absorb(inscri2)  
estimates store othercit3
reghdfe norm_score priority fem_priority $subject $subject_fem $g_pol_enem_sub if Campinas_metropolitan_region==0, cluster(inscri2) absorb(inscri2)  
estimates store othercit4
reghdfe norm_score priority fem_priority $subject $subject_fem $g_pol_enem_sub $g_norm_enem_w_prio if Campinas_metropolitan_region==0, cluster(inscri2) absorb(inscri2)  
estimates store othercit5
reghdfe norm_score priority fem_priority $subject $subject_fem $g_pol_enem_sub $g_norm_enem_w_prio $gs_pol_norm_p1score if Campinas_metropolitan_region==0, cluster(inscri2) absorb(inscri2)  
estimates store othercit6
reghdfe norm_score priority fem_priority $subject $subject_fem $g_pol_enem_sub $g_norm_enem_w_prio $gs_pol_norm_p1score $gs_pol_norm_p1score_prio if Campinas_metropolitan_region==0, cluster(inscri2) absorb(inscri2)  
estimates store othercit7

estadd local sub_fe "No":  Campinas1 othercit1
estadd local sub_fe "Yes": Campinas2 Campinas3 Campinas4 Campinas5 Campinas6 Campinas7 othercit2 othercit3 othercit4 othercit5 othercit6 othercit7

estadd local subgender_fe "No":  Campinas1 othercit1
estadd local subgender_fe "Yes": Campinas2 Campinas3 Campinas4 Campinas5 Campinas6 Campinas7 othercit2 othercit3 othercit4 othercit5 othercit6 othercit7

estadd local ind_fe "No": Campinas1 Campinas2 othercit1 othercit2 
estadd local ind_fe "Yes": Campinas3 Campinas4 Campinas5 Campinas6 Campinas7 othercit3 othercit4 othercit5 othercit6 othercit7

estadd local enemsub "No":  Campinas1 Campinas2 Campinas3 othercit1 othercit2 othercit3 
estadd local enemsub "Yes": Campinas4 Campinas5 Campinas6 Campinas7 othercit4 othercit5 othercit6 othercit7

estadd local enemprio_pol4 "No":  Campinas1 Campinas2 Campinas3 Campinas4 othercit1 othercit2 othercit3 othercit4 
estadd local enemprio_pol4 "Yes": Campinas5 Campinas6 Campinas7 othercit5 othercit6 othercit7

estadd local p1score_pol4 "No": Campinas1 Campinas2 Campinas3 Campinas4 Campinas5 othercit1 othercit2 othercit3 othercit4 othercit5
estadd local p1score_pol4 "Yes": Campinas6 Campinas7 othercit6 othercit7

estadd local p1scoreprio_pol4 "No": Campinas1 Campinas2 Campinas3 Campinas4 Campinas5 Campinas6 othercit1 othercit2 othercit3 othercit4 othercit5 othercit6
estadd local p1scoreprio_pol4 "Yes": Campinas7 othercit7 

* Comparing coefficients
reghdfe norm_score i.Campinas_metropolitan_region##c.(priority fem_priority $subject $subject_fem $g_pol_enem_sub $g_norm_enem_w_prio $gs_pol_norm_p1score $gs_pol_norm_p1score_prio) , cluster(inscri2) absorb(inscri2)
local pval = (2 * ttail(e(df_r), abs(_b[1.Campinas_metropolitan_region#c.fem_priority] / _se[1.Campinas_metropolitan_region#c.fem_priority]) ) )
display `pval'
estimates restore Campinas1
estadd scalar p_value_coefs = `pval'
estimates store Campinas1

* Tex
esttab Campinas* othercit* using "Output/p_Campinas_metropolitanregion_normscore.tex", se star(* 0.10 ** 0.05 *** 0.01) nogap ///
stats(p_value_coefs line r2_a N N_clust sep sub_fe subgender_fe ind_fe enemsub  enemprio_pol4 p1score_pol4 p1scoreprio_pol4 , fmt(%9.3fc %1s %9.3fc %9.0fc %9.0fc %1s %3s %3s %3s %3s %3s %3s %3s %3s) labels("P-value (Female $\times$ Priority)" " " "$\bar{R}^2$" "Number of observations"  "Number of applicants" " " "Subject FE" "Subject-gender FE" "Individual FE" "ENEM $\times$ Subject FE" "ENEM $\times$ Priority" "Phase 1 scores" "Phase 1 scores $\times$ Priority"  )) b(%7.3f) se(%7.3f)  booktabs replace f label nomtitle collabels(none) keep(female priority fem_priority norm_enem_w_g) mgroups("Campinas metropolitan region" "Other cities", pattern (1 0 0 0 0 0 0 1 0 0 0 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
refcat(female " \\ \multicolumn{15}{l}{\textit{Dependent variable: Phase 2 normalized subject-specific scores}} \\", nolabel)
